source("./code/loadPackages.R") # Install and load necessary packages

#### Appendix 7.5: Topic Model Approach to Speaker Topics ####

# Load necessary data from fitted keyATM
load("./data/keyATMData.RData")

#### Tables A45 and A46: Most Frequent Terms for All KeyATM Topics ####
tts = top_words(outAll, 20)

topwordsList = list()
for (i in 1:ncol(tts)) {
  onetopic = tts[,i]
  
  topwords = paste(onetopic, collapse=", ")
  topwords = gsub("✓", "X", topwords)
  topwords = gsub("1", "T", topwords)  
  topwords = gsub("2", "B", topwords)  
  topwords = gsub("3", "D", topwords)  
  topwords = gsub("4", "V", topwords)  
  topwords = gsub("5", "A", topwords)  
  
  topwordsList[[i]] = data.frame(topic=names(tts)[i], topwords)
}
topwordsData = bind_rows(topwordsList)
print(xtable(topwordsData), include.rownames=F)



#### Appendix 7.5.2: Topic Validation ####

# Data on speech acts, actual topic from keyATM, and RA's guess of topic
topic_valid = fread("./data/keyATMTopicValidation.csv")
topic_valid$topicActual = factor(topic_valid$topicActual, levels=c("violence", "threat", "power", "diplomacy", "adversary"),
                                 labels=c("Violence", "Threat", "Balance", "Diplomacy", "Adversary"))
topic_valid$topicGuess = factor(topic_valid$topicGuess, levels=c("violence", "threat", "power", "diplomacy", "adversary"),
                                labels=c("Violence", "Threat", "Balance", "Diplomacy", "Adversary"))

#### Table A47: Comparison of Hand-Coded Topics with Topics Extracted from KeyATM ####
table(topic_valid$topicGuess, topic_valid$topicActual)

#### Table A48: Performance Metrics for Hand-Coded Topics ####
metrics = crf_evaluation(topic_valid$topicGuess, topic_valid$topicActual)
metrics$bylabel

# Threat topic has weakest performance; what happens if you remove segments that keyATM says are "threat"?
topic_valid_2 = topic_valid[which(topic_valid$topicActual!="threat"),]
crf_evaluation(topic_valid_2$topicGuess, topic_valid_2$topicActual) 

# Threat topic has weakest performance; what happens if you remove segments that RA says are "threat"?
topic_valid_3 = topic_valid[which(topic_valid$topicGuess!="threat"),]
crf_evaluation(topic_valid_3$topicGuess, topic_valid_3$topicActual)  



#### Appendix 7.5.3: Counsel Congruence with Predispositions ####

# Make sure speech act data treats administration as a factor
segsATM$admin = factor(segsATM$admin, levels=c("Truman", "Eisenhower", "Kennedy", "Johnson",
                                               "Nixon", "Ford", "Carter", "Reagan"))

# Relationship between topic prevalence and actor's hawkishness
violence_all = lm(violence ~ hawkMean + formal + I(doctype=="Transcript") + factor(admin), data=segsATM)

threat_all = lm(threat ~ hawkMean + formal + I(doctype=="Transcript") + factor(admin), data=segsATM)

balance_all = lm(balance ~ hawkMean + formal + I(doctype=="Transcript") + factor(admin), data=segsATM)

diplo_all = lm(diplo ~ hawkMean + formal + I(doctype=="Transcript") + factor(admin), data=segsATM)

adversary_all = lm(adversary ~ hawkMean + formal + I(doctype=="Transcript") + factor(admin), data=segsATM)

#### Table A49: Hawkishness and Speech Act Content ####
stargazer(violence_all, threat_all, balance_all, diplo_all, adversary_all,
          no.space=T,
          align=T,omit.stat=c("f","ser","adj.rsq","rsq"),
          digits=3,
          covariate.labels = c("Speaker Hawkishness", "Formal", "Transcript", "Eisenhower",
                               "Kennedy", "Johnson", "Nixon", "Ford", "Carter", "Reagan"))



#### Figure A17: Effect of Speaker Hawkishness on Topic Proportions in Meeting-Adviser Speech Acts, Using KeyATM Model ####
models = c("violence_all", "threat_all", "balance_all", "diplo_all", "adversary_all")

hawkCoef = hawkSE = NA
for (i in 1:length(models)) {
  onemodel = get(models[i])
  hawkCoef[i] = summary(onemodel)$coefficients["hawkMean","Estimate"]
  hawkSE[i] = summary(onemodel)$coefficients["hawkMean","Std. Error"]
}
plotdata = data.frame(model=models, hawkCoef, hawkSE)
plotdata$lower = plotdata$hawkCoef - 1.96*hawkSE
plotdata$upper = plotdata$hawkCoef + 1.96*hawkSE
plotdata$model = factor(plotdata$model, levels=models)
plotdata$signif = ifelse((plotdata$hawkCoef > 0 & plotdata$lower > 0) | (plotdata$hawkCoef < 0 & plotdata$upper < 0), "Yes", "No")
plotdata$signif = factor(plotdata$signif, levels=c("Yes", "No"))

atmPlot = ggplot(plotdata, aes(model, hawkCoef)) + geom_pointrange(aes(ymin=lower, ymax=upper, shape=signif)) +
  geom_hline(yintercept=0, linetype=2) +
  scale_x_discrete("", labels=c("Violence", "Threat", "Military\nBalance", "Diplomacy", "Adversary\nInterests")) +
  scale_y_continuous("Effect of hawkishness\non topic proportion") + theme_bw() + 
  scale_shape_manual("95% Significance", values=c(15,16)) + theme(legend.position = "none")

print(atmPlot)

ggsave(filename="./figures/keyATM_advMtg.pdf", plot=atmPlot,
       height=3, width=4.25, units='in')

